
from init import PATHS
import logging
LOGGER = logging.getLogger(__name__)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('seaborn')
import seaborn as sns
sns.set(style="white")
pal = sns.color_palette('colorblind')
from C_PatentVariables import conventions_names_colors, firm_year_aggregation, b_patent_oems
dict_subsector_shortnames, dict_var_colors, list_of_subsectors_in_cleancars = conventions_names_colors.main()


def main():
    LOGGER.info('Begin c_othergraphs_spillovers.py')
    plot_BatFC_Patenting_percent_from_MotorVehicle()
    plot_bat_fc_patenting_bysector_moresectors()
    plot_citations_OEMs_vs_nonOEMS()
    LOGGER.info('END c_othergraphs_spillovers.py')


def plot_BatFC_Patenting_percent_from_MotorVehicle():
    df_batfc_fam = pd.read_csv(PATHS.dropbox / 'Data_outputted/C_PatentVariables/Families_Bat_FC_naics_info.csv')
    df_batfc_fam['docdb_bvd'] = df_batfc_fam['docdb_family_id'].astype(str) + '_' + df_batfc_fam['bvdid'].fillna('')
    maskyears = df_batfc_fam['earliest_filing_year'].isin(range(1990, 2016))
    pal2 = sns.color_palette('Greys')
    # BAT
    maskBAT = df_batfc_fam['Sub-sector_exclusive'] == 'batteries'
    maskMotor = df_batfc_fam['MotorVehicle'].notnull() & (df_batfc_fam['MotorVehicle'] > 0)
    maskOEMsorSubsi = df_batfc_fam['OEMorSubsidiary'].notnull() & (df_batfc_fam['OEMorSubsidiary'] > 0)
    masksector = maskMotor | maskOEMsorSubsi
    data1 = df_batfc_fam[maskBAT & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
    data2 = df_batfc_fam[maskBAT & maskyears].groupby('earliest_filing_year')['docdb_bvd'].nunique()
    data3 = 100 * data1 / data2
    plt.plot(data3, label='Battery', linewidth=3, color=dict_var_colors['Count_Bat_excl'][1])
    # FC
    maskFC = df_batfc_fam['Sub-sector_exclusive'] == 'fuel cells'
    masksector = maskMotor | maskOEMsorSubsi
    data1 = df_batfc_fam[maskFC & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
    data2 = df_batfc_fam[maskFC & maskyears].groupby('earliest_filing_year')['docdb_bvd'].nunique()
    data3 = 100 * data1 / data2
    plt.plot(data3, label='Fuel Cells', linewidth=3, color=dict_var_colors['Count_FC_excl'][1])
    plt.legend(ncol=1)
    plt.ylim(0)
    plt.xlabel('Year')
    plt.ylabel('Patents From Motor Vehicle Sector (%)')
    plt.savefig(PATHS.figures / 'cross_sectoral_spillovers' / 'BatFC_Patenting_percent_from_MotorVehicle.png', bbox_inches='tight')
    plt.savefig(PATHS.figures / 'cross_sectoral_spillovers' / 'BatFC_Patenting_percent_from_MotorVehicle.pdf', bbox_inches='tight')
    plt.close()



def plot_bat_fc_patenting_bysector_moresectors():
    df_batfc_fam = pd.read_csv(PATHS.dropbox / 'Data_outputted/C_PatentVariables/Families_Bat_FC_naics_info.csv')
    df_batfc_fam['docdb_bvd'] = df_batfc_fam['docdb_family_id'].astype(str) + '_' + df_batfc_fam['bvdid'].fillna('')
    maskyears = df_batfc_fam['earliest_filing_year'].isin(range(1990, 2016))
    maskBAT = df_batfc_fam['Sub-sector_exclusive'] == 'batteries'
    maskFC = df_batfc_fam['Sub-sector_exclusive'] == 'fuel cells'
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    for ax, mask in zip([ax1, ax2], [maskBAT, maskFC]):
        maskMotor = df_batfc_fam['MotorVehicle'].notnull() & (df_batfc_fam['MotorVehicle'] > 0)
        maskOEMsorSubsi = df_batfc_fam['OEMorSubsidiary'].notnull() & (df_batfc_fam['OEMorSubsidiary'] > 0)
        masksector = maskMotor | maskOEMsorSubsi
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Motor Vehicle, OEMs and Subsidiaries', color=pal[3], linewidth=3)
        # ELECTRONICS
        masksector = df_batfc_fam['Electronics'].notnull() & (df_batfc_fam['Electronics'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Electronics', color=pal[0], linewidth=3)
        # MACHINERY
        masksector = df_batfc_fam['MachineryChemical'].notnull() & (df_batfc_fam['MachineryChemical'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Machinery and Chemical Manufacturing', color=pal[-6], linewidth=2)
        # OTHER MANUFACTURING
        masksector = df_batfc_fam['OtherManufacturing'].notnull() & (df_batfc_fam['OtherManufacturing'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Other Manufacturing', color=pal[-4], linewidth=2)
        # EDUCATION R&D
        masksector = df_batfc_fam['EducationRD'].notnull() & (df_batfc_fam['EducationRD'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Education and R&D', color=pal[2], linewidth=2)
        # TRANSPORT
        masksector = df_batfc_fam['OtherTransport'].notnull() & (df_batfc_fam['OtherTransport'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Other Transport', color=pal[1], linewidth=2)
        # UTILITIES
        masksector = df_batfc_fam['Utilities'].notnull() & (df_batfc_fam['Utilities'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Utilities', color=pal[-5], linewidth=2)
        # OTHER SECTOR
        masksector = df_batfc_fam['OtherSector'].notnull() & (df_batfc_fam['OtherSector'] > 0)
        data = df_batfc_fam[mask & maskyears & masksector].groupby('earliest_filing_year')['docdb_bvd'].nunique()
        ax.plot(data, label='Other Sector', color=pal[-3], linewidth=2, linestyle='-')
        ax.set_xlabel('Year')
        ax.set_ylabel('Number of DocDB Families')
    plt.subplots_adjust(bottom=0.23)
    handles, labels = ax1.get_legend_handles_labels()
    fig.legend(handles, labels, loc='lower center', ncol=3, fontsize=8, fancybox=True, frameon=True, edgecolor='black', framealpha=.5, borderpad=.7)
    ax1.set_title('Battery patents')
    ax2.set_title('Fuel cell patents')
    plt.savefig(PATHS.figures / 'cross_sectoral_spillovers' / 'BatFC_Patenting_bysector_moresectors.pdf', bbox_inches='tight')
    plt.savefig(PATHS.figures / 'cross_sectoral_spillovers' / 'BatFC_Patenting_bysector_moresectors.png', bbox_inches='tight')
    plt.close()


def get_BatFCfamilies():
    data = pd.read_csv(PATHS.dropbox / f'Data_outputted/C_PatentVariables/Families_of_ipc_cpc_transpo_cpc_ipc.csv')
    mask = data['Sub-sector'].str.contains('Batteries') | data['Sub-sector'].str.contains('Fuel Cells')
    BatFC_Fam = data[mask & data['Sub-sector'].notnull()]
    BatFC_Fam = BatFC_Fam[['docdb_family_id', 'Sub-sector']].drop_duplicates()
    dfoverlap = pd.read_csv(PATHS.dropbox / 'Data_outputted/C_PatentVariables/Families_overlap.csv')
    dfoverlap = BatFC_Fam.merge(dfoverlap, on='docdb_family_id', how='left')
    data = pd.read_csv(PATHS.dropbox / f'Data_outputted/C_PatentVariables/Families_of_ipc_cpc_transpo.csv')
    data = data[['docdb_family_id', 'earliest_filing_year']].drop_duplicates()
    dfoverlap = data.merge(dfoverlap, on='docdb_family_id', how='right')
    return dfoverlap


def plot_citations_OEMs_vs_nonOEMS():
    df_cit_year_bat = pd.read_csv(PATHS.dropbox / f'Data_outputted/C_PatentVariables/Citations_backward_Timeseries_batteryOEMs.csv')
    df_cit_year_fc = pd.read_csv(PATHS.dropbox / f'Data_outputted/C_PatentVariables/Citations_backward_Timeseries_fuelcellOEMs.csv')
    df_cit_year_bat = df_cit_year_bat[df_cit_year_bat['Year'].isin(range(2000, 2015))]
    df_cit_year_fc = df_cit_year_fc[df_cit_year_fc['Year'].isin(range(2000, 2015))]
    df_cit_year_bat = df_cit_year_bat.set_index('Year')
    df_cit_year_fc = df_cit_year_fc.set_index('Year')
    df_cit_year_bat['nonOEMs'] = df_cit_year_bat['Total'] - df_cit_year_bat['OEMs']
    df_cit_year_fc['nonOEMs'] = df_cit_year_fc['Total'] - df_cit_year_fc['OEMs']
    fig, ax = plt.subplots(figsize=[10, 6], facecolor='white')
    ax.plot(df_cit_year_bat['nonOEMs'], color=pal[-1], label='To Non-OEMs (Bat)', linewidth=3)
    ax.plot(df_cit_year_fc['nonOEMs'], color=pal[3], label='To Non-OEMs (FC)', linewidth=3)
    plt.ylim(ymin=0)
    plt.legend(ncol=1, loc="best", frameon=False)
    plt.xlabel('Years')
    plt.ylabel("Nbr of Backward Citations")
    ax.plot(df_cit_year_bat['OEMs'], color=pal[-1], label='To OEMs (Bat)', linewidth=2, linestyle='--')
    ax.plot(df_cit_year_fc['OEMs'], color=pal[3], label='To OEMs (FC)', linewidth=2, linestyle='--')
    plt.legend(ncol=1, loc="best", frameon=False)
    plt.savefig(PATHS.figures / 'cross_sectoral_spillovers' / 'bckwdcit_nbr_goingto_nonoem_and_oems.pdf', bbox_inches='tight')
    plt.savefig(PATHS.figures / 'cross_sectoral_spillovers' / 'bckwdcit_nbr_goingto_nonoem_and_oems.png', bbox_inches='tight')
    plt.close()
